Load libraries

library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(ggthemes)
library(stats) 
library(moments)
library(grid)
library(formattable)
library(gridExtra)
library(ggsignif)
library(hrbrthemes)
library(plotrix)
library(rstatix)
library(car)
library(plotly)

Load data

data <- read.csv("/Users/nuriteliash/Documents/GitHub/Sofia-varroa-project/EAG_data_all.csv")

EAG stat analysis

the tested effect (x) is the Essential oil “EO” dissolved in acetone, the “response” (y) is the electrophysiological response of the leg measured in mV. Each leg was stimulated by different stimuli in the following order:Air > Air > Air > Acetone (solvent) > 0.1 > 0.25 > 0.5 > 1 > 2.5 > 5 > Acetone (solvent) > Air

the response was normalized…. [please complete in here:)]

to test the difference in response amplitude to the different stimuli, we will use One way ANOVA. then a post-hoc Tukey test, to see which of the stimuli are significantly differnet. we followed this tutorial for ANOVA in R

data summary

# our data contains: 
str(data)
## 'data.frame':    664 obs. of  4 variables:
##  $ leg     : Factor w/ 83 levels "VF1","VF10","VF11",..: 1 1 1 1 1 1 1 1 12 12 ...
##  $ stimuli : Factor w/ 8 levels "0.1","0.25","0.5",..: 8 1 2 3 4 5 6 7 8 1 ...
##  $ EO      : Factor w/ 9 levels "EO4309","EO4444",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ response: num  109.5 118.5 86.6 131.9 108.5 ...
table(data$stimuli, data$EO)
##              
##               EO4309 EO4444 EO4891 EO4899 EO5068 EO5663 EO5714 EO5749 EO5763
##   0.1              8     10     10     10     11      9      9      9      7
##   0.25             8     10     10     10     11      9      9      9      7
##   0.5              8     10     10     10     11      9      9      9      7
##   1                8     10     10     10     11      9      9      9      7
##   2.5              8     10     10     10     11      9      9      9      7
##   5                8     10     10     10     11      9      9      9      7
##   acet_after       8     10     10     10     11      9      9      9      7
##   acet_before      8     10     10     10     11      9      9      9      7

We have tested a total of 9 essential oils, using 83 legs. Each essential oil was tested on at least 7 different legs.

Prior to analysis, we gonna check the ANOVA assumptions:

(1) Outliers

Outliers <- data %>% 
  group_by(stimuli,EO) %>%
  identify_outliers(response)

Outliers
## # A tibble: 50 x 6
##    stimuli EO     leg   response is.outlier is.extreme
##    <fct>   <fct>  <fct>    <dbl> <lgl>      <lgl>     
##  1 0.1     EO4444 VF2      207.  TRUE       FALSE     
##  2 0.1     EO4891 VF78      44.2 TRUE       FALSE     
##  3 0.1     EO4891 VF79     182.  TRUE       FALSE     
##  4 0.1     EO4891 VF82     171.  TRUE       FALSE     
##  5 0.1     EO5068 VF33     139.  TRUE       FALSE     
##  6 0.1     EO5068 VF34     180.  TRUE       TRUE      
##  7 0.1     EO5749 VF55     161.  TRUE       FALSE     
##  8 0.25    EO4444 VF2      183.  TRUE       FALSE     
##  9 0.25    EO4899 VF21     236.  TRUE       TRUE      
## 10 0.25    EO4899 VF29      49.4 TRUE       FALSE     
## # … with 40 more rows
# plot it to detect outliers by specific leg
#data %>% 
  #ggboxplot(x = "stimuli", y = "response", color = "leg")

(2) Normality:

#the dependent variable should be approximately normally distributed in each cell of the design. This can be checked using the Shapiro-Wilk normality test (shapiro_test()
normality <- data %>%
  group_by(stimuli,EO) %>%
  shapiro_test(response)

normality
## # A tibble: 72 x 5
##    stimuli EO     variable statistic      p
##    <fct>   <fct>  <chr>        <dbl>  <dbl>
##  1 0.1     EO4309 response     0.883 0.200 
##  2 0.1     EO4444 response     0.852 0.0622
##  3 0.1     EO4891 response     0.937 0.518 
##  4 0.1     EO4899 response     0.942 0.581 
##  5 0.1     EO5068 response     0.818 0.0165
##  6 0.1     EO5663 response     0.775 0.0106
##  7 0.1     EO5714 response     0.989 0.994 
##  8 0.1     EO5749 response     0.968 0.880 
##  9 0.1     EO5763 response     0.889 0.270 
## 10 0.25    EO4309 response     0.940 0.606 
## # … with 62 more rows
# looks like that for a few EO x stimuli combinations  the response doesnt distribute normally  (p<0.05)
# however, as we have enough replicates (n=10), it is ok to contniue with ANOVA

(3) Homogneity of variance

# Build the linear model
model <- lapply(split(data, data$EO), function(i){
  lm(response ~ stimuli, data = i)
})

# now you can Create a QQ plot of residuals of each EO, for example, essential oil EO4309:
ggqqplot(residuals(model$EO4309))

plot(model$EO4309, 1)

in my opinion, if you detect an outlier, you should take out the entire leg, and then check again the normality

One-Way Anova

as we wish to know the difference in response to stimuli, in each EO, we will use one way Anova, after spliting the data per EO:

anova_perEO <- lapply(split(data, data$EO), function(i){
  anova(lm(response ~ stimuli, data = i))
})
anova_perEO
## $EO4309
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7   1482  211.75  0.3499 0.9269
## Residuals 56  33894  605.24               
## 
## $EO4444
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7   5452  778.86  0.7103 0.6634
## Residuals 72  78951 1096.54               
## 
## $EO4891
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7  10606  1515.2  0.7921 0.5963
## Residuals 72 137723  1912.8               
## 
## $EO4899
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7   6311   901.6  0.5624 0.7839
## Residuals 72 115422  1603.1               
## 
## $EO5068
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7  16440  2348.6  1.7911 0.1004
## Residuals 80 104897  1311.2               
## 
## $EO5663
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7  17143  2449.0  1.4552 0.1994
## Residuals 64 107705  1682.9               
## 
## $EO5714
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7  17420  2488.6  0.6658    0.7
## Residuals 64 239210  3737.7               
## 
## $EO5749
## Analysis of Variance Table
## 
## Response: response
##           Df Sum Sq Mean Sq F value Pr(>F)
## stimuli    7   4250  607.08  0.4378 0.8748
## Residuals 64  88752 1386.75               
## 
## $EO5763
## Analysis of Variance Table
## 
## Response: response
##           Df  Sum Sq Mean Sq F value Pr(>F)
## stimuli    7   249.4   35.63  0.0885 0.9987
## Residuals 48 19324.4  402.59

Post hoc Tukey test

to test the significant difference of each stimuli concentration from the solvent

tukey <- data %>% 
  group_by(EO) %>% 
  tukey_hsd(response ~ stimuli)
tukey
## # A tibble: 252 x 10
##    EO     term    group1 group2     null.value estimate conf.low conf.high p.adj
##  * <fct>  <chr>   <chr>  <chr>           <dbl>    <dbl>    <dbl>     <dbl> <dbl>
##  1 EO4309 stimuli 0.1    0.25                0     8.73    -30.0      47.5 0.996
##  2 EO4309 stimuli 0.1    0.5                 0    13.1     -25.6      51.9 0.961
##  3 EO4309 stimuli 0.1    1                   0    14.8     -23.9      53.5 0.928
##  4 EO4309 stimuli 0.1    2.5                 0    11.3     -27.4      50.1 0.983
##  5 EO4309 stimuli 0.1    5                   0    11.1     -27.6      49.8 0.985
##  6 EO4309 stimuli 0.1    acet_after          0     3.39    -35.3      42.1 1    
##  7 EO4309 stimuli 0.1    acet_befo…          0     5.22    -33.5      43.9 1    
##  8 EO4309 stimuli 0.25   0.5                 0     4.41    -34.3      43.1 1    
##  9 EO4309 stimuli 0.25   1                   0     6.06    -32.7      44.8 1    
## 10 EO4309 stimuli 0.25   2.5                 0     2.62    -36.1      41.3 1    
## # … with 242 more rows, and 1 more variable: p.adj.signif <chr>

EAG plot

for some reason, i cannot add the leg ID to the hovering text in the box plot, but i can do it in the dot-plot. so in order to detect outliers, we can detect them in the first plot (the box plot), then identify their ID leg in the dot plot:)

# first sort the order of stimuli
data$stimuli <- factor(data$stimuli,levels = c("acet_before", "0.1", "0.25", "0.5", "1", "2.5", "5","acet_after"))

box <- ggplot(data, aes(x = stimuli, y = response)) +
  geom_boxplot(aes(colour = EO)) +
  facet_wrap( ~ EO) +
  theme_linedraw() +
        ggtitle("Varroa foreleg response to different essential oils") +
        xlab("Stimuli amount (microgram)") +
        ylab("Normalized response (%)") +
        theme(axis.text.x=element_text(angle=45, hjust=1))
  
ggplotly(box, tooltip = "leg")
dot <- ggplot(data, aes(x = stimuli, y = response, colour = EO, leg=leg)) +
  geom_point() +
  facet_wrap( ~ EO) +
  theme_linedraw() +
        ggtitle("Varroa foreleg response to different essential oils") +
        xlab("Stimuli amount (microgram)") +
        ylab("Normalized response (%)") +
        theme(axis.text.x=element_text(angle=45, hjust=1))
ggplotly(dot, tooltip = c("leg","response"))